#Replication material for "Attention Deficit Disorder"
#iga and jvk
#2-9-2022

#Automated Content Analysis Replication Materials

#FOX News: Hannity Transcripts

#Required packages
require(tidyverse)
require(tm)
require(lubridate)
require(ggthemes)
require(tidytext)

#read in data
dat <- read.csv("auto_content_analysis_hannity.csv")

#visualize distribution (Appendix Fig. F1, top panel)

ggplot() +
  geom_histogram(dat=dat,aes(x=as.Date(date)),bins=50)+
  theme_few() +
  ggtitle("Histogram, FOX Hannity Transcripts over Time")


#rescale dataframe to weekly data
dat2 <- dat %>%
group_by(week) %>%
  mutate(text2 = paste0(text, collapse = "")) %>%
  summarize(text=first(text2),
            date=first(date))


#create vector of deficit mentions

#step 1: clean corpus and create DTM
corp <- VCorpus(VectorSource(dat2$text))
corp <- tm_map(corp,removePunctuation)
corp <- tm_map(corp,removeNumbers)
corp <- tm_map(corp,content_transformer(tolower))

dtm <- DocumentTermMatrix(corp)
dtm <- as.matrix(dtm)
dtm <- data.frame(dtm)

deficit <- dtm[,"deficit"]
deficit2 <- dtm[,"deficits"]

#create vector of 'national debt'
BigramTokenizer <-
  function(x)
    unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE)

dtm <- DocumentTermMatrix(corp,control=list(tokenize=BigramTokenizer))
dtm <- as.matrix(dtm)
dtm <- data.frame(dtm)

debt <- dtm[,"national.debt"]
debt2 <- dtm[,"the.debt"]
debt3 <- dtm[,"our.debt"]
debtCeiling <- dtm[,"debt.ceiling"]
tradeDeficit <- dtm[,"trade.deficit"]
deficitIn <- dtm[,"deficit.in"]
deficitsIn <- dtm[,"deficits.in"]

#create sum of token mentions
deficit <- deficit + deficit2 - tradeDeficit - deficitIn - deficitsIn
debt <- debt + debt2 + debt3

dat2$nchar <- nchar(dat2$text)

dat2 <- cbind(dat2,deficit)
dat2 <- cbind(dat2,debt)  

#scale mentions against average text length
dat2$mentions <- (dat2$deficit+dat2$debt)*(105337.3/dat2$nchar)

#remove any empty cells
dat2 <- dat2[!dat2$nchar==0,]

#create cutoffs for administrations
dat2 <- arrange(dat2,date)
dat2[dat2$date>'2013-12-31'&dat2$date<'2014-01-31',] #item 258
dat2[dat2$date>'2015-12-31'&dat2$date<'2016-01-31',] #item 362
dat2[dat2$date>'2016-12-31'&dat2$date<'2017-01-31',] #item 415 = inauguration
dat2[dat2$date>'2017-12-31'&dat2$date<'2018-01-31',] #item 464 

#pre-post inauguration: 360-415 vs. 416-464
dat2$year <- lubridate::year(dat2$date)

#final data object for graphical presentation: group by year
dat3 <- dat2 %>%
  group_by(year) %>%
  summarize(mention = sum(mentions),
            mentionsd = sd(mentions),
            n = n())

dat3$mention <- dat3$mention/52
dat3$mentionsd <- dat3$mentionsd/52

dat3$lo <- (dat3$mention - 1.96*dat3$mentionsd/sqrt(dat3$n))
dat3$hi <- (dat3$mention + 1.96*dat3$mentionsd/sqrt(dat3$n))

dat3$Administration <- car::recode(dat3$year,"2017:2020='Trump Administration';
                                   else='Obama Administration'")

plotTitle <- expression(paste("Fox News' ", italic("Hannity")))

datH <- dat3

datH$show <- "Hannity"


#First half of Fig. F2

a <- ggplot()+
  geom_bar(data=dat3,aes(x=year,y=mention,fill=Administration),stat="identity")+
  geom_errorbar(data=dat3,aes(x=year,ymin=lo,ymax=hi),width=0.1)+
  theme_few()+
  scale_fill_grey()+
  ylab("Debt and Deficit Mentions per Week")+
  ggtitle(plotTitle)+
  #ylim(c(0,250))+
  xlab("Year")

windows()
a





##MADDOW

#read in data

dat <- read.csv("auto_content_analysis_maddow.csv")


#visualize histogram of transcripts over time (Fig. F1 bottom panel)
a <- ggplot() +
  geom_histogram(dat=dat,aes(x=as.Date(date)),bins=50)+
  theme_few() +
  ylim(c(0,80))+
  ggtitle("Histogram, MSNBC Rachel Maddow Transcripts over Time")


dat2 <- dat %>%
  group_by(week) %>%
  mutate(text2 = paste0(text, collapse = "")) %>%
  summarize(text=first(text2),
            date=first(date))


#create vector of deficit mentions
corp <- VCorpus(VectorSource(dat2$text))
corp <- tm_map(corp,removePunctuation)
corp <- tm_map(corp,removeNumbers)
corp <- tm_map(corp,content_transformer(tolower))

dtm <- DocumentTermMatrix(corp)
dtm <- as.matrix(dtm)
dtm <- data.frame(dtm)

deficit <- dtm[,"deficit"]
deficits <- dtm[,"deficits"]

#create vector of 'national debt'
BigramTokenizer <-
  function(x)
    unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE)

dtm <- DocumentTermMatrix(corp,control=list(tokenize=BigramTokenizer))
dtm <- as.matrix(dtm)
dtm <- data.frame(dtm)

debt <- dtm[,"national.debt"]
debt2 <- dtm[,"the.debt"]
debt3 <- dtm[,"our.debt"]
debtCeiling <- dtm[,"debt.ceiling"]
tradeDeficit <- dtm[,"trade.deficit"]
deficitIn <- dtm[,"deficit.in"]
deficitsIn <- dtm[,"deficits.in"]


deficit <- deficit + deficits - tradeDeficit - deficitIn - deficitsIn
debt <- debt + debt2 + debt3

#calculating mentions
#tradeDeficit <- dtm[,"trade.deficit"]

#calculating mentions

dat2$nchar <- nchar(dat2$text)

dat2 <- cbind(dat2,deficit)
dat2 <- cbind(dat2,debt)  
dat2$mentions <- (dat2$deficit+dat2$debt)*(105337.3/dat2$nchar)

dat2 <- dat2[!dat2$nchar==0,]

#pre-post inauguration: 360-415 vs. 416-464
dat2$year <- lubridate::year(dat2$date)

dat3 <- dat2 %>%
  group_by(year) %>%
  summarize(mention = sum(mentions),
            mentionsd = sd(mentions),
            n = n())

dat3$mention <- dat3$mention/52
dat3$mentionsd <- dat3$mentionsd/52

dat3$lo <- (dat3$mention - 1.96*dat3$mentionsd/sqrt(dat3$n))
dat3$hi <- (dat3$mention + 1.96*dat3$mentionsd/sqrt(dat3$n))

dat3$Administration <- car::recode(dat3$year,"2017:2020='Trump Administration';
                                   else='Obama Administration'")

plotTitle <- expression(paste("MSNBC's ", italic("The Rachel Maddow Show")))

dat3$show <- "The Rachel Maddow Show"


dat4 <- rbind(dat3,datH)

b <- ggplot()+
  geom_bar(data=dat4,aes(x=year,y=mention,fill=Administration),stat="identity")+
  geom_errorbar(data=dat4,aes(x=year,ymin=lo,ymax=hi),width=0.1)+
  theme_few()+
  scale_fill_grey()+
  ylab("Debt and Deficit Mentions per Week")+
  facet_wrap(~show,ncol=1) +
  xlab("Year")









#############################################
#combined graphic: Final version of Fig. F2

require(gridExtra)

windows(8,8)
grid.arrange(b,ncol=1)


